package whf.framework.lucene.analyzer;

import java.io.Reader;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;

/**
 * @author solo L
 * 
 */
@SuppressWarnings("unchecked")
public class CJKAnalyzer extends Analyzer {//实现了Analyzer接口，这是lucene的要求
    public final static String[] STOP_WORDS = { "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on",
		"or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with", "", "www" };
    
    private Set stopTable;    

    public CJKAnalyzer() {
        stopTable = StopFilter.makeStopSet(STOP_WORDS);
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
        return new StopFilter(new CJKTokenizer(reader), stopTable);
    }    
}